/**** card_instruments.dta
Creates Card instruments
This do-file creates predicted migration flows across
Provincial boundaries to each 2005 defn prefecture.  To calculate
these predicted flows for each type of migrant, it uses the formula

sum over all other provinces i
(total migrants out of Province i to cities other than j in year t)
*(fraction of migrants in year 1990 from i going to j)

Note that in the data sets used, migration from own province is coded as
fromProvince=1.

Resulting variables are XXXXp00 and XXXXXp05.

****/

clear
set more off


******* A. Create Base Correspondence and Data **********

use ..\..\data\tabular_data_BJ\generated\us123-census.dta
** Drop units not in the census
drop if unit_status<1
gen countyCode = string(census_code)
** Units with no code
drop if countyCode=="."
keep countyCode city05 year sampop c_totalPop province_code
sort countyCode year
save temp_cencp.dta, replace


*********** B. Create 1990 Base Pathways **********

** Calculate fraction of each type of migrant to each 2010 Defn CP
use ..\..\data\census\generated\count1990Mig.dta
gen year = 1990
sort countyCode year
merge countyCode year using temp_cencp.dta
tab year _merge
*** 1s are outside our study area
*** 2s have no migrants
keep if (_merge==1|_merge==3) & year==1990
drop _merge

*** Fix fromProvince variables
drop if fromProvince==0 | fromProvince==.
gen fromProvince2 = fromProvince
replace fromProvince2 = province_code/10000 if fromProvince==1
replace fromProvince2 = 51 if fromProvince2==50

*** Apply Provincial Weights
foreach X of varlist ruralMig-mig18to55Edu9Greater {
	replace `X' = `X'/0.0104856948259734 if province_code==110000 & year==1990
	replace `X' = `X'/0.0130791966036386 if province_code==120000 & year==1990
	replace `X' = `X'/0.00976147989113532 if province_code==130000 & year==1990
	replace `X' = `X'/0.0104715690183259 if province_code==140000 & year==1990
	replace `X' = `X'/0.0118723679087625 if province_code==150000 & year==1990
	replace `X' = `X'/0.0107576598978953 if province_code==210000 & year==1990
	replace `X' = `X'/0.010814713382742 if province_code==220000 & year==1990
	replace `X' = `X'/0.0110158000569816 if province_code==230000 & year==1990
	replace `X' = `X'/0.0115695700221318 if province_code==310000 & year==1990
	replace `X' = `X'/0.0103751284793056 if province_code==320000 & year==1990
	replace `X' = `X'/0.0104590486930804 if province_code==330000 & year==1990
	replace `X' = `X'/0.0120020335056383 if province_code==340000 & year==1990
	replace `X' = `X'/0.0114160505034443 if province_code==350000 & year==1990
	replace `X' = `X'/0.011929876629665 if province_code==360000 & year==1990
	replace `X' = `X'/0.00980405597741145 if province_code==370000 & year==1990
	replace `X' = `X'/0.0101516748980099 if province_code==410000 & year==1990
	replace `X' = `X'/0.0104176992770507 if province_code==420000 & year==1990
	replace `X' = `X'/0.0103084163513093 if province_code==430000 & year==1990
	replace `X' = `X'/0.0100002807610139 if province_code==440000 & year==1990
	replace `X' = `X'/0.010567331423635 if province_code==450000 & year==1990
	replace `X' = `X'/0.0110877925398804 if province_code==460000 & year==1990
	replace `X' = `X'/0.00980917665888599 if province_code==500000 & year==1990
	replace `X' = `X'/0.00980917665888599 if province_code==510000 & year==1990
	replace `X' = `X'/0.00961913386857969 if province_code==520000 & year==1990
	replace `X' = `X'/0.0101087805269901 if province_code==530000 & year==1990
	replace `X' = `X'/0.0110086019644719 if province_code==540000 & year==1990
	replace `X' = `X'/0.0103511899662564 if province_code==610000 & year==1990
	replace `X' = `X'/0.0108154966257644 if province_code==620000 & year==1990
	replace `X' = `X'/0.0128754084074611 if province_code==630000 & year==1990
	replace `X' = `X'/0.0083057473916061 if province_code==640000 & year==1990
	replace `X' = `X'/0.0102265947680152 if province_code==650000 & year==1990
}

*** Collapse down to rural and urban units in each prefecture
collapse (sum) ruralMig-mig18to55Edu9Greater, by(city05 fromProvince fromProvince2)
save temp.dta, replace

*** Create 1990 migrant totals data set
foreach X of varlist ruralMig-mig18to55Edu9Greater {
gen `X'op90 = `X'
replace `X'op90 = 0 if fromProvince==1
rename `X' `X't90
}
collapse (sum) *op90 *t90, by(city05)
sort city05
save totals.dta, replace

use temp.dta, replace
** Generate fractions
foreach X of varlist ruralMig-mig18to55Edu9Greater {
	egen `X't = sum(`X'), by(fromProvince)
	gen `X'f90 = `X'/`X't
	egen `X'2t = sum(`X'), by(fromProvince2)
	gen `X'fa90 = `X'/`X'2t
}

drop if city05==.

*** Calculate fraction in province
egen tot = sum(ruralMig), by(city05)
gen frac = ruralMig/tot
egen tot2 = sum(mig18to55Edu9Greater), by(city05)
gen frac2 = mig18to55Edu9Greater/tot2
**** Distribution of Fraction from Within Province
sum frac if fromProvince==1, detail
sum frac2 if fromProvince==1, detail

keep city05 fromProvince fromProvince2 *f90 *fa90
sort city05 fromProvince fromProvince2

save temp90.dta, replace


use ..\..\data\census\generated\count2000Mig.dta
gen year = 2000
sort countyCode year
merge countyCode year using temp_cencp.dta
tab year _merge
*** 1s are outside our study area
*** 2s have no migrants
keep if (_merge==1|_merge==3) & year==2000
drop _merge

*** Fix fromProvince variables
drop if fromProvince==0 | fromProvince==.
gen fromProvince2 = fromProvince
replace fromProvince2 = province_code/10000 if fromProvince==1
replace fromProvince2 = 51 if fromProvince2==50

foreach X of varlist ruralMig-mig18to55Edu9Greater {
	replace `X' = `X'*c_totalPop/sampop if year==2000 & sampop~=0
	replace `X' = `X'*100 if year==2000 & c_totalPop==.
}

collapse (sum) ruralMig-mig18to55Edu9Greater, by(city05 fromProvince fromProvince2)
save temp.dta, replace

*** Create 2000 migrant totals data set
foreach X of varlist ruralMig-mig18to55Edu9Greater {
gen `X'op00 = `X'
replace `X'op00 = 0 if fromProvince==1
rename `X' `X't00
}
collapse (sum) *op00 *t00, by(city05)
sort city05
merge 1:1 city05 using totals.dta
drop _merge
sort city05
save totals.dta, replace

use temp.dta, replace
*** Calculate fraction in province
egen tot = sum(ruralMig), by(city05)
gen frac = ruralMig/tot
egen tot2 = sum(mig18to55Edu9Greater), by(city05)
gen frac2 = mig18to55Edu9Greater/tot2
**** Distribution of Fraction from Within Province
sum frac if fromProvince==1, detail
sum frac2 if fromProvince==1, detail

sort city05 fromProvince
merge 1:1 city05 fromProvince fromProvince2 using temp90.dta
*1 is 2000 flows that did not occur in 1990
*2 is 1990 flows that did not occur in 2000
*3 is flows that occurred in both years
drop _merge

foreach X of varlist ruralMig-mig18to55Edu9Greater {
	egen `X't = sum(`X'), by(fromProvince)   
	gen `X'p00 = (`X't-`X')*`X'f90
	replace `X'p00 = 0 if fromProvince==1
	egen `X'2t = sum(`X'), by(fromProvince2)
	gen `X'pa00 = (`X'2t-`X')*`X'fa90
	}

collapse (sum) *p00 *pa00, by(city05)
drop if city05==.

sort city05
save card00.dta, replace


use ..\..\data\census\generated\count2005Mig.dta
gen year = 2005
tostring countyCode, replace
sort countyCode year
merge countyCode year using temp_cencp.dta
tab year _merge
*** 1s are outside our study area
*** 2s have no migrants
keep if (_merge==1|_merge==3) & year==2005
drop _merge

drop if fromProvince==0 | fromProvince==.
gen fromProvince2 = fromProvince
replace fromProvince2 = province_code/10000 if fromProvince==1
replace fromProvince2 = 51 if fromProvince2==50

foreach X of varlist ruralMig-mig18to55Edu9Greater {
	replace `X' = `X'*500 if year==2005
}

collapse (sum) ruralMig-mig18to55Edu9Greater, by(city05 fromProvince fromProvince2)
save temp.dta, replace

*** Create 2000 migrant totals data set
foreach X of varlist ruralMig-mig18to55Edu9Greater {
gen `X'op05 = `X'
replace `X'op05 = 0 if fromProvince==1
rename `X' `X't05
}
collapse (sum) *op05 *t05, by(city05)
sort city05
merge 1:1 city05 using totals.dta
drop _merge
sort city05
save totals.dta, replace

use temp.dta, replace
*** Calculate fraction in province
egen tot = sum(ruralMig), by(city05)
gen frac = ruralMig/tot
egen tot2 = sum(mig18to55Edu9Greater), by(city05)
gen frac2 = mig18to55Edu9Greater/tot2
**** Distribution of Fraction from Within Province
sum frac if fromProvince==1, detail
sum frac2 if fromProvince==1, detail

sort city05 fromProvince fromProvince2
merge 1:1 city05 fromProvince fromProvince2 using temp90.dta
*1 is 2000 flows that did not occur in 1990
*2 is 1990 flows that did not occur in 2000
*3 is flows that occurred in both years
drop _merge

foreach X of varlist ruralMig-mig18to55Edu9Greater {
	egen `X't = sum(`X'), by(fromProvince)   
	gen `X'p05 = (`X't-`X')*`X'f90
	replace `X'p05 = 0 if fromProvince==1
	egen `X'2t = sum(`X'), by(fromProvince2)
	gen `X'pa05 = (`X'2t-`X')*`X'fa90
	}

collapse (sum) *p05 *pa05, by(city05)
drop if city05==.

sort city05
merge 1:1 city05 using card00.dta
drop _merge

sort city05
merge 1:1 city05 using totals.dta
drop _merge

**** Label Variables
#delimit ;
foreach X in ruralMig urbanMig mig18to55 mig18Less mig55Greater mig18to55Agr 
mig18to55NonAgr mig18to55Ind mig18to55Ser mig18to55AgrEdu9LessE
mig18to55AgrEdu9Greater mig18to55NonAgrEdu9LessE mig18to55NonAgrEdu9Greater
mig18to55Edu9LessE mig18to55Edu9Greater {;
	foreach yy in "90" "00" "05" {;
		label variable `X't`yy' "total migrants, including in province, past 5 yrs";
		label variable `X'op`yy' "total out of province migrants, past 5 yrs";
	};
	foreach yy in "00" "05" {;
		label variable `X'p`yy' "total predicted out of province migrants, past 5 yrs";
		label variable `X'pa`yy' "total predicted migrants, including in province, past 5 yrs";
	};
};
#delimit cr

*** This is regions outside of our study area
drop if city05==.

sort city05
save ..\..\data\census\generated\card-inst.dta, replace


erase temp90.dta
erase card00.dta
erase temp.dta
erase totals.dta
